import requests
import re
import json
import time
import xlwt

def getHTMLText(url):
    try:
        print('url ===> ' + url)
        user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4295.400'
        r = requests.get(url, timeout = 30, headers = {'User-Agent': user_agent})
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return 'getHTMLText error'

def parsePage(ilt, html):
    try:
        oid = re.findall(r'\"offerid\"\:\"[\d\.]*\"', html)
        for i in range(len(oid)):
            offerid = eval(oid[i].split(':')[1])
            parseGoodsDetail(ilt, offerid)
            time.sleep(10)
    except:
        print('parsePage error')

def parseGoodsDetail(ilt, id):
    eurl = 'https://detail.1688.com/offer/' + id + '.html?spm=a312h.2018_new_sem.dh_002.1.1c856a34PmIKXy&tracelog=p4p&clickid=9e9e6d2e90b94edb9b6586aac927257e&sessionid=43e9503d1b17f7f7e142c10232bebca9'
    try:
        detail = getHTMLText(eurl)
        title = re.findall(r'<title>(.*?)-阿里巴巴</title>', detail)[0]
        datasku = re.findall(r'data-sku-config=\'(.*?)\'>', detail)
        priceunit = re.findall(r'<span><em class="value">(\d+\.?\d*)</em><em class="price-unit">元</em></span>', detail)

        for i in range(len(datasku)):
            jsonData = json.loads(datasku[i])
            ilt.append([title, jsonData['skuName'], priceunit[i]])
    except:
        print('parseGoodsDetail error')


def printGoodsList(worksheet, count, ilt):
    tplt = '{:4}\t{:16}\t{:8}\t{}'
    for info in ilt:
        count += 1
        worksheet.write(count, 0, count)
        worksheet.write(count, 1, info[0])
        worksheet.write(count, 2, info[1])
        worksheet.write(count, 3, info[2])
        print(tplt.format(count, info[0], info[1], info[2]))

def main():
    tplt = '{:4}\t{:16}\t{:8}\t{}'
    print(tplt.format('序号', '商品名称', '规格', '价格'))

    workbook = xlwt.Workbook(encoding = 'utf-8')
    worksheet = workbook.add_sheet('Sheet 1', cell_overwrite_ok = True)
    worksheet.write(0, 0, '序号')
    worksheet.write(0, 1, '商品名称')
    worksheet.write(0, 2, '规格')
    worksheet.write(0, 3, '价格')

    goods = '空白卷轴'
    count = 0
    try:
        for i in range(1, 21):
            url = 'https://p4psearch.1688.com/p4p114/p4psearch/offer.htm?keywords=' + goods + '&beginPage=' + str(i) + '#sm-filtbar'
            html = getHTMLText(url)
            infoList = []
            time.sleep(5)
            parsePage(infoList, html)
            printGoodsList(worksheet, count, infoList)
    except:
        print('main error')
    workbook.save(str(int(time.time())) + '.xls')

main()
